/* -*- c -*- */

/* The purpose of this module is to add faster math for array scalars
   that does not go through the ufunc machinery

   but still supports error-modes.
*/

#include "Python.h"
#include "numpy/noprefix.h"
#include "numpy/ufuncobject.h"
#include "numpy/arrayscalars.h"


/** numarray adapted routines.... **/

#if SIZEOF_LONGLONG == 64 || SIZEOF_LONGLONG == 128
static int ulonglong_overflow(ulonglong a, ulonglong b)
{
    ulonglong ah, al, bh, bl, w, x, y, z;

#if SIZEOF_LONGLONG == 64
    ah = (a >> 32);
    al = (a & 0xFFFFFFFFL);
    bh = (b >> 32);
    bl = (b & 0xFFFFFFFFL);
#elif SIZEOF_LONGLONG == 128
    ah = (a >> 64);
    al = (a & 0xFFFFFFFFFFFFFFFFL);
    bh = (b >> 64);
    bl = (b & 0xFFFFFFFFFFFFFFFFL);
#else
    ah = al = bh = bl = 0;
#endif

    /* 128-bit product:  z*2**64 + (x+y)*2**32 + w  */
    w = al*bl;
    x = bh*al;
    y = ah*bl;
    z = ah*bh;

    /* *c = ((x + y)<<32) + w; */
#if SIZEOF_LONGLONG == 64
    return z || (x>>32) || (y>>32) ||
        (((x & 0xFFFFFFFFL) + (y & 0xFFFFFFFFL) + (w >> 32)) >> 32);
#elif SIZEOF_LONGLONG == 128
    return z || (x>>64) || (y>>64) ||
        (((x & 0xFFFFFFFFFFFFFFFFL) + (y & 0xFFFFFFFFFFFFFFFFL) + (w >> 64)) >> 64);
#else
    return 0;
#endif

}
#else
static int ulonglong_overflow(ulonglong NPY_UNUSED(a), ulonglong NPY_UNUSED(b))
{
	return 0;
}
#endif

static int slonglong_overflow(longlong a0, longlong b0)
{
    ulonglong a, b;
    ulonglong ah, al, bh, bl, w, x, y, z;

    /* Convert to non-negative quantities */
    if (a0 < 0) {
        a = -a0;
    }
    else {
        a = a0;
    }
    if (b0 < 0) {
        b = -b0;
    }
    else {
        b = b0;
    }


#if SIZEOF_LONGLONG == 64
    ah = (a >> 32);
    al = (a & 0xFFFFFFFFL);
    bh = (b >> 32);
    bl = (b & 0xFFFFFFFFL);
#elif SIZEOF_LONGLONG == 128
    ah = (a >> 64);
    al = (a & 0xFFFFFFFFFFFFFFFFL);
    bh = (b >> 64);
    bl = (b & 0xFFFFFFFFFFFFFFFFL);
#else
    ah = al = bh = bl = 0;
#endif

    w = al*bl;
    x = bh*al;
    y = ah*bl;
    z = ah*bh;

    /*
      ulonglong c = ((x + y)<<32) + w;
      if ((a0 < 0) ^ (b0 < 0))
      *c = -c;
      else
      *c = c
      */

#if SIZEOF_LONGLONG == 64
    return z || (x>>31) || (y>>31) ||
        (((x & 0xFFFFFFFFL) + (y & 0xFFFFFFFFL) + (w >> 32)) >> 31);
#elif SIZEOF_LONGLONG == 128
    return z || (x>>63) || (y>>63) ||
        (((x & 0xFFFFFFFFFFFFFFFFL) + (y & 0xFFFFFFFFFFFFFFFFL) + (w >> 64)) >> 63);
#else
    return 0;
#endif
}
/** end direct numarray code **/


/* Basic operations:
 *
 *  BINARY:
 *
 * add, subtract, multiply, divide, remainder, divmod, power,
 * floor_divide, true_divide
 *
 * lshift, rshift, and, or, xor (integers only)
 *
 * UNARY:
 *
 * negative, positive, absolute, nonzero, invert, int, long, float, oct, hex
 *
 */

/**begin repeat
 *  #name = byte, short, int, long, longlong#
 */
static void
@name@_ctype_add(@name@ a, @name@ b, @name@ *out) {
    *out = a + b;
    if ((*out^a) >= 0 || (*out^b) >= 0) {
        return;
    }
    generate_overflow_error();
    return;
}
static void
@name@_ctype_subtract(@name@ a, @name@ b, @name@ *out) {
    *out = a - b;
    if ((*out^a) >= 0 || (*out^~b) >= 0) {
        return;
    }
    generate_overflow_error();
    return;
}
/**end repeat**/

/**begin repeat
 *  #name = ubyte, ushort, uint, ulong, ulonglong#
 */
static void
@name@_ctype_add(@name@ a, @name@ b, @name@ *out) {
    *out = a + b;
    if (*out >= a && *out >= b) {
        return;
    }
    generate_overflow_error();
    return;
}
static void
@name@_ctype_subtract(@name@ a, @name@ b, @name@ *out) {
    *out = a - b;
    if (a >= b) {
        return;
    }
    generate_overflow_error();
    return;
}
/**end repeat**/

#ifndef SIZEOF_BYTE
#define SIZEOF_BYTE 1
#endif

/**begin repeat
 *
 * #name = byte, ubyte, short, ushort, int, uint, long, ulong#
 * #big = (int,uint)*2, (longlong,ulonglong)*2#
 * #NAME = BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG#
 * #SIZENAME = BYTE*2, SHORT*2, INT*2, LONG*2#
 * #SIZE = INT*4,LONGLONG*4#
 * #neg = (1,0)*4#
 */
#if SIZEOF_@SIZE@ > SIZEOF_@SIZENAME@
static void
@name@_ctype_multiply(@name@ a, @name@ b, @name@ *out) {
    @big@ temp;
    temp = ((@big@) a) * ((@big@) b);
    *out = (@name@) temp;
#if @neg@
    if (temp > MAX_@NAME@ || temp < MIN_@NAME@)
#else
        if (temp > MAX_@NAME@)
#endif
            generate_overflow_error();
    return;
}
#endif
/**end repeat**/

/**begin repeat
 *
 * #name = int, uint, long, ulong, longlong, ulonglong#
 * #SIZE = INT*2, LONG*2, LONGLONG*2#
 * #char = (s,u)*3#
 */
#if SIZEOF_LONGLONG == SIZEOF_@SIZE@
static void
@name@_ctype_multiply(@name@ a, @name@ b, @name@ *out) {
    *out = a * b;
    if (@char@longlong_overflow(a, b)) {
        generate_overflow_error();
    }
    return;
}
#endif
/**end repeat**/

/**begin repeat
 *
 * #name = byte, ubyte, short, ushort, int, uint, long,
 *         ulong, longlong, ulonglong#
 * #neg = (1,0)*5#
 */
static void
@name@_ctype_divide(@name@ a, @name@ b, @name@ *out) {
    if (b == 0) {
        generate_divbyzero_error();
        *out = 0;
    }
#if @neg@
    else if (b == -1 && a < 0 && a == -a) {
        generate_overflow_error();
        *out = a / b;
    }
#endif
    else {
#if @neg@
        @name@ tmp;
        tmp = a / b;
        if (((a > 0) != (b > 0)) && (a % b != 0)) {
            tmp--;
        }
        *out = tmp;
#else
        *out = a / b;
#endif
    }
}

#define @name@_ctype_floor_divide @name@_ctype_divide
static void
@name@_ctype_remainder(@name@ a, @name@ b, @name@ *out) {
    if (a == 0 || b == 0) {
        if (b == 0) generate_divbyzero_error();
        *out = 0;
        return;
    }
#if @neg@
    else if ((a > 0) == (b > 0)) {
        *out = a % b;
    }
    else {
        /* handled like Python does */
        *out = a % b;
        if (*out) *out += b;
    }
#else
    *out = a % b;
#endif
}
/**end repeat**/

/**begin repeat
 *
 * #name = byte, ubyte, short, ushort, int, uint, long,
 *         ulong, longlong, ulonglong#
 * #otyp = float*4, double*6#
 */
#define @name@_ctype_true_divide(a, b, out)     \
    *(out) = ((@otyp@) (a)) / ((@otyp@) (b));
/**end repeat**/

/* b will always be positive in this call */
/**begin repeat
 *
 * #name = byte, ubyte, short, ushort, int, uint, long, ulong, longlong, ulonglong#
 * #upc = BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, LONGLONG, ULONGLONG#
 */
static void
@name@_ctype_power(@name@ a, @name@ b, @name@ *out) {
    @name@ temp, ix, mult;
    /* code from Python's intobject.c, with overflow checking removed. */
    temp = a;
    ix = 1;
    while (b > 0) {
        if (b & 1) {
            @name@_ctype_multiply(ix, temp, &mult);
            ix = mult;
            if (temp == 0) {
                break;
            }
        }
        b >>= 1;        /* Shift exponent down by 1 bit */
        if (b==0) {
            break;
        }
        /* Square the value of temp */
        @name@_ctype_multiply(temp, temp, &mult);
        temp = mult;
    }
    *out = ix;
}
/**end repeat**/



/* QUESTION:  Should we check for overflow / underflow in (l,r)shift? */

/**begin repeat
 * #name = (byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong)*5#
 * #oper = and*10, xor*10, or*10, lshift*10, rshift*10#
 * #op = &*10, ^*10, |*10, <<*10, >>*10#
 */
#define @name@_ctype_@oper@(arg1, arg2, out) *(out) = (arg1) @op@ (arg2)
/**end repeat**/

/**begin repeat
 * #name = float, double, longdouble#
 */
static @name@ (*_basic_@name@_floor)(@name@);
static @name@ (*_basic_@name@_sqrt)(@name@);
static @name@ (*_basic_@name@_fmod)(@name@, @name@);
#define @name@_ctype_add(a, b, outp) *(outp) = a + b
#define @name@_ctype_subtract(a, b, outp) *(outp) = a - b
#define @name@_ctype_multiply(a, b, outp) *(outp) = a * b
#define @name@_ctype_divide(a, b, outp) *(outp) = a / b
#define @name@_ctype_true_divide @name@_ctype_divide
#define @name@_ctype_floor_divide(a, b, outp)   \
    *(outp) = _basic_@name@_floor((a) / (b))
/**end repeat**/

/**begin repeat
 * #name = cfloat, cdouble, clongdouble#
 * #rtype = float, double, longdouble#
 * #c = f,,l#
 */
#define @name@_ctype_add(a, b, outp) do{        \
    (outp)->real = (a).real + (b).real;         \
    (outp)->imag = (a).imag + (b).imag;         \
    } while(0)
#define @name@_ctype_subtract(a, b, outp) do{   \
    (outp)->real = (a).real - (b).real;         \
    (outp)->imag = (a).imag - (b).imag;         \
    } while(0)
#define @name@_ctype_multiply(a, b, outp) do{                   \
    (outp)->real = (a).real * (b).real - (a).imag * (b).imag;   \
    (outp)->imag = (a).real * (b).imag + (a).imag * (b).real;   \
    } while(0)
#define @name@_ctype_divide(a, b, outp) do{                     \
    @rtype@ d = (b).real*(b).real + (b).imag*(b).imag;          \
    (outp)->real = ((a).real*(b).real + (a).imag*(b).imag)/d;   \
    (outp)->imag = ((a).imag*(b).real - (a).real*(b).imag)/d;   \
    } while(0)
#define @name@_ctype_true_divide @name@_ctype_divide
#define @name@_ctype_floor_divide(a, b, outp) do {      \
    (outp)->real = _basic_@rtype@_floor                 \
    (((a).real*(b).real + (a).imag*(b).imag) /          \
     ((b).real*(b).real + (b).imag*(b).imag));          \
    (outp)->imag = 0;                                   \
    } while(0)
/**end repeat**/

/**begin repeat
 * #name = float, double, longdouble#
 */
static void
@name@_ctype_remainder(@name@ a, @name@ b, @name@ *out) {
    @name@ mod;
    mod = _basic_@name@_fmod(a, b);
    if (mod && (((b < 0) != (mod < 0)))) {
        mod += b;
    }
    *out = mod;
}
/**end repeat**/



/**begin repeat
 * #name = byte, ubyte, short, ushort, int, uint, long, ulong, longlong,
 *         ulonglong, float, double, longdouble, cfloat, cdouble, clongdouble#
 */
#define @name@_ctype_divmod(a, b, out, out2) {  \
    @name@_ctype_floor_divide(a, b, out);       \
    @name@_ctype_remainder(a, b, out2);         \
    }
/**end repeat**/

/**begin repeat
 * #name = float, double, longdouble#
 */
static @name@ (*_basic_@name@_pow)(@name@ a, @name@ b);
static void
@name@_ctype_power(@name@ a, @name@ b, @name@ *out) {
    *out = _basic_@name@_pow(a, b);
}
/**end repeat**/

/**begin repeat
 * #name = byte, ubyte, short, ushort, int, uint, long, ulong, longlong,
 *         ulonglong, float, double, longdouble#
 * #uns = (0,1)*5,0*3#
 */
static void
@name@_ctype_negative(@name@ a, @name@ *out)
{
#if @uns@
    generate_overflow_error();
#endif
    *out = -a;
}
/**end repeat**/


/**begin repeat
 * #name = cfloat, cdouble, clongdouble#
 */
static void
@name@_ctype_negative(@name@ a, @name@ *out)
{
    out->real = -a.real;
    out->imag = -a.imag;
}
/**end repeat**/

/**begin repeat
 * #name = byte, ubyte, short, ushort, int, uint, long, ulong, longlong,
 *         ulonglong, float, double, longdouble#
 */
static void
@name@_ctype_positive(@name@ a, @name@ *out)
{
    *out = a;
}
/**end repeat**/

/*
 * Get the nc_powf, nc_pow, and nc_powl functions from
 * the data area of the power ufunc in umathmodule.
 */

/**begin repeat
 * #name = cfloat, cdouble, clongdouble#
 */
static void
@name@_ctype_positive(@name@ a, @name@ *out)
{
    out->real = a.real;
    out->imag = a.imag;
}
static void (*_basic_@name@_pow)(@name@ *, @name@ *, @name@ *);
static void
@name@_ctype_power(@name@ a, @name@ b, @name@ *out)
{
    _basic_@name@_pow(&a, &b, out);
}
/**end repeat**/


/**begin repeat
 * #name = ubyte, ushort, uint, ulong, ulonglong#
 */
#define @name@_ctype_absolute @name@_ctype_positive
/**end repeat**/


/**begin repeat
 * #name = byte, short, int, long, longlong, float, double, longdouble#
 */
static void
@name@_ctype_absolute(@name@ a, @name@ *out)
{
    *out = (a < 0 ? -a : a);
}
/**end repeat**/

/**begin repeat
 * #name = cfloat, cdouble, clongdouble#
 * #rname = float, double, longdouble#
 */
static void
@name@_ctype_absolute(@name@ a, @rname@ *out)
{
    *out = _basic_@rname@_sqrt(a.real*a.real + a.imag*a.imag);
}
/**end repeat**/

/**begin repeat
 * #name = byte, ubyte, short, ushort, int, uint, long,
 *         ulong, longlong, ulonglong#
 */
#define @name@_ctype_invert(a, out) *(out) = ~a;
/**end repeat**/

/*** END OF BASIC CODE **/


/* The general strategy for commutative binary operators is to
 *
 * 1) Convert the types to the common type if both are scalars (0 return)
 * 2) If both are not scalars use ufunc machinery (-2 return)
 * 3) If both are scalars but cannot be cast to the right type
 * return NotImplmented (-1 return)
 *
 * 4) Perform the function on the C-type.
 * 5) If an error condition occurred, check to see
 * what the current error-handling is and handle the error.
 *
 * 6) Construct and return the output scalar.
 */

/**begin repeat
 * #name = byte, ubyte, short, ushort, int, uint, long, ulong, longlong,
 *         ulonglong, float, double, longdouble, cfloat, cdouble, clongdouble#
 * #Name = Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong,
 *         ULongLong, Float, Double, LongDouble, CFloat, CDouble, CLongDouble#
 * #NAME = BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, LONGLONG,
 *         ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, CFLOAT, CDOUBLE, CLONGDOUBLE#
 */

static int
_@name@_convert_to_ctype(PyObject *a, @name@ *arg1)
{
    PyObject *temp;

    if (PyArray_IsScalar(a, @Name@)) {
        *arg1 = PyArrayScalar_VAL(a, @Name@);
        return 0;
    }
    else if (PyArray_IsScalar(a, Generic)) {
        PyArray_Descr *descr1;

        if (!PyArray_IsScalar(a, Number)) {
            return -1;
        }
        descr1 = PyArray_DescrFromTypeObject((PyObject *)(a->ob_type));
        if (PyArray_CanCastSafely(descr1->type_num, PyArray_@NAME@)) {
            PyArray_CastScalarDirect(a, descr1, arg1, PyArray_@NAME@);
            Py_DECREF(descr1);
            return 0;
        }
        else {
            Py_DECREF(descr1);
            return -1;
        }
    }
    else if (PyArray_GetPriority(a, PyArray_SUBTYPE_PRIORITY) >
            PyArray_SUBTYPE_PRIORITY) {
        return -2;
    }
    else if ((temp = PyArray_ScalarFromObject(a)) != NULL) {
        int retval = _@name@_convert_to_ctype(temp, arg1);

        Py_DECREF(temp);
        return retval;
    }
    return -2;
}

/**end repeat**/


/**begin repeat
 * #name = byte, ubyte, short, ushort, int, uint, long, ulong,
 *         longlong, ulonglong, float, double, cfloat, cdouble#
 */
static int
_@name@_convert2_to_ctypes(PyObject *a, @name@ *arg1,
                           PyObject *b, @name@ *arg2)
{
    int ret;
    ret = _@name@_convert_to_ctype(a, arg1);
    if (ret < 0) {
        return ret;
    }
    ret = _@name@_convert_to_ctype(b, arg2);
    if (ret < 0) {
        return ret;
    }
    return 0;
}
/**end repeat**/

/**begin repeat
 * #name = longdouble, clongdouble#
 */

static int
_@name@_convert2_to_ctypes(PyObject *a, @name@ *arg1,
                           PyObject *b, @name@ *arg2)
{
    int ret;
    ret = _@name@_convert_to_ctype(a, arg1);
    if (ret < 0) {
        return ret;
    }
    ret = _@name@_convert_to_ctype(b, arg2);
    if (ret == -2) {
        ret = -3;
    }
    if (ret < 0) {
        return ret;
    }
    return 0;
}

/**end repeat**/



/**begin repeat
   #name=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong)*13, (float, double, longdouble, cfloat, cdouble, clongdouble)*6, (float, double, longdouble)*2#
   #Name=(Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong)*13, (Float, Double, LongDouble, CFloat, CDouble, CLongDouble)*6, (Float, Double, LongDouble)*2#
   #oper=add*10, subtract*10, multiply*10, divide*10, remainder*10, divmod*10, floor_divide*10, lshift*10, rshift*10, and*10, or*10, xor*10, true_divide*10, add*6, subtract*6, multiply*6, divide*6, floor_divide*6, true_divide*6, divmod*3, remainder*3#
   #fperr=1*70,0*50,1*52#
   #twoout=0*50,1*10,0*106,1*3,0*3#
   #otyp=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong)*12, float*4, double*6, (float, double, longdouble, cfloat, cdouble, clongdouble)*6, (float, double, longdouble)*2#
   #OName=(Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong)*12, Float*4, Double*6, (Float, Double, LongDouble, CFloat, CDouble, CLongDouble)*6, (Float, Double, LongDouble)*2#
**/

static PyObject *
@name@_@oper@(PyObject *a, PyObject *b)
{
    PyObject *ret;
    @name@ arg1, arg2;
    @otyp@ out;
#if @twoout@
    @otyp@ out2;
    PyObject *obj;
#endif

#if @fperr@
    int retstatus;
    int first;
#endif

    switch(_@name@_convert2_to_ctypes(a, &arg1, b, &arg2)) {
        case 0:
            break;
        case -1:
            /* one of them can't be cast safely must be mixed-types*/
            return PyArray_Type.tp_as_number->nb_@oper@(a,b);
        case -2:
            /* use default handling */
            if (PyErr_Occurred()) {
                return NULL;
            }
            return PyGenericArrType_Type.tp_as_number->nb_@oper@(a,b);
        case -3:
            /*
             * special case for longdouble and clongdouble
             * because they have a recursive getitem in their dtype
             */
            Py_INCREF(Py_NotImplemented);
            return Py_NotImplemented;
    }

#if @fperr@
    PyUFunc_clearfperr();
#endif

    /*
     * here we do the actual calculation with arg1 and arg2
     * as a function call.
     */
#if @twoout@
    @name@_ctype_@oper@(arg1, arg2, &out, &out2);
#else
    @name@_ctype_@oper@(arg1, arg2, &out);
#endif

#if @fperr@
    /* Check status flag.  If it is set, then look up what to do */
    retstatus = PyUFunc_getfperr();
    if (retstatus) {
        int bufsize, errmask;
        PyObject *errobj;

        if (PyUFunc_GetPyValues("@name@_scalars", &bufsize, &errmask,
                                &errobj) < 0) {
            return NULL;
        }
        first = 1;
        if (PyUFunc_handlefperr(errmask, errobj, retstatus, &first)) {
            Py_XDECREF(errobj);
            return NULL;
        }
        Py_XDECREF(errobj);
    }
#endif


#if @twoout@
    ret = PyTuple_New(2);
    if (ret == NULL) {
        return NULL;
    }
    obj = PyArrayScalar_New(@OName@);
    if (obj == NULL) {
        Py_DECREF(ret);
        return NULL;
    }
    PyArrayScalar_ASSIGN(obj, @OName@, out);
    PyTuple_SET_ITEM(ret, 0, obj);
    obj = PyArrayScalar_New(@OName@);
    if (obj == NULL) {
        Py_DECREF(ret);
        return NULL;
    }
    PyArrayScalar_ASSIGN(obj, @OName@, out2);
    PyTuple_SET_ITEM(ret, 1, obj);
#else
    ret = PyArrayScalar_New(@OName@);
    if (ret == NULL) {
        return NULL;
    }
    PyArrayScalar_ASSIGN(ret, @OName@, out);
#endif
    return ret;
}
/**end repeat**/

/**begin repeat
   #name=byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float, double, longdouble, cfloat, cdouble, clongdouble#
   #Name=Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong, Float, Double, LongDouble, CFloat, CDouble, CLongDouble#
   #otyp=float*4, double*6, float, double, longdouble, cfloat, cdouble, clongdouble#
   #OName=Float*4, Double*6, Float, Double, LongDouble, CFloat, CDouble, CLongDouble#
   #isint=(1,0)*5,0*6#
   #cmplx=0*13,1*3#
**/

static PyObject *
@name@_power(PyObject *a, PyObject *b, PyObject *NPY_UNUSED(c))
{
    PyObject *ret;
    @name@ arg1, arg2;
    int retstatus;
    int first;

#if @cmplx@
    @name@ out = {0,0};
    @otyp@ out1;
    out1.real = out.imag = 0;
#else
    @name@ out = 0;
    @otyp@ out1=0;
#endif

    switch(_@name@_convert2_to_ctypes(a, &arg1, b, &arg2)) {
        case 0:
            break;
        case -1:
            /* can't cast both safely mixed-types? */
            return PyArray_Type.tp_as_number->nb_power(a,b,NULL);
        case -2:
            /* use default handling */
            if (PyErr_Occurred()) {
                return NULL;
            }
            return PyGenericArrType_Type.tp_as_number->nb_power(a,b,NULL);
        case -3:
            /*
             * special case for longdouble and clongdouble
             * because they have a recursive getitem in their dtype
             */
            Py_INCREF(Py_NotImplemented);
            return Py_NotImplemented;
    }

    PyUFunc_clearfperr();

    /*
     * here we do the actual calculation with arg1 and arg2
     * as a function call.
     */
#if @cmplx@
    if (arg2.real == 0 && arg1.real == 0) {
        out1.real = out.real = 1;
        out1.imag = out.imag = 0;
    }
#else
    if (arg2 == 0) {
        out1 = out = 1;
    }
#endif
#if @isint@
    else if (arg2 < 0) {
        @name@_ctype_power(arg1, -arg2, &out);
        out1 = (@otyp@) (1.0 / out);
    }
#endif
    else {
        @name@_ctype_power(arg1, arg2, &out);
    }

    /* Check status flag.  If it is set, then look up what to do */
    retstatus = PyUFunc_getfperr();
    if (retstatus) {
        int bufsize, errmask;
        PyObject *errobj;

        if (PyUFunc_GetPyValues("@name@_scalars", &bufsize, &errmask,
                                &errobj) < 0) {
            return NULL;
        }
        first = 1;
        if (PyUFunc_handlefperr(errmask, errobj, retstatus, &first)) {
            Py_XDECREF(errobj);
            return NULL;
        }
        Py_XDECREF(errobj);
    }

#if @isint@
    if (arg2 < 0) {
        ret = PyArrayScalar_New(@OName@);
        if (ret == NULL) {
            return NULL;
        }
        PyArrayScalar_ASSIGN(ret, @OName@, out1);
    }
    else {
        ret = PyArrayScalar_New(@Name@);
        if (ret == NULL) {
            return NULL;
        }
        PyArrayScalar_ASSIGN(ret, @Name@, out);
    }
#else
    ret = PyArrayScalar_New(@Name@);
    if (ret == NULL) {
        return NULL;
    }
    PyArrayScalar_ASSIGN(ret, @Name@, out);
#endif

    return ret;
}
/**end repeat**/


/**begin repeat
 * #name = (cfloat,cdouble,clongdouble)*2#
 * #oper = divmod*3,remainder*3#
 */
#define @name@_@oper@ NULL
/**end repeat**/

/**begin repeat
 * #name = (float,double,longdouble,cfloat,cdouble,clongdouble)*5#
 * #oper = lshift*6, rshift*6, and*6, or*6, xor*6#
 */
#define @name@_@oper@ NULL
/**end repeat**/


/**begin repeat
 * #name=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble)*3, byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong#
 * #otyp=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble)*2,byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,float,double,longdouble,byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong#
 * #OName=(Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong, Float, Double, LongDouble, CFloat, CDouble, CLongDouble)*2, Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong, Float, Double, LongDouble, Float, Double, LongDouble, Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong#
 * #oper=negative*16, positive*16, absolute*16, invert*10#
 */
static PyObject *
@name@_@oper@(PyObject *a)
{
    @name@ arg1;
    @otyp@ out;
    PyObject *ret;

    switch(_@name@_convert_to_ctype(a, &arg1)) {
    case 0:
        break;
    case -1:
        /* can't cast both safely use different add function */
        Py_INCREF(Py_NotImplemented);
        return Py_NotImplemented;
    case -2:
        /* use default handling */
        if (PyErr_Occurred()) {
            return NULL;
        }
        return PyGenericArrType_Type.tp_as_number->nb_@oper@(a);
    }

    /*
     * here we do the actual calculation with arg1 and arg2
     * make it a function call.
     */

    @name@_ctype_@oper@(arg1, &out);

    ret = PyArrayScalar_New(@OName@);
    PyArrayScalar_ASSIGN(ret, @OName@, out);

    return ret;
}
/**end repeat**/

/**begin repeat
 * #name = float, double, longdouble, cfloat, cdouble, clongdouble#
 */
#define @name@_invert NULL
/**end repeat**/

/**begin repeat
 * #name = byte, ubyte, short, ushort, int, uint, long, ulong, longlong,
 *         ulonglong, float, double, longdouble, cfloat, cdouble, clongdouble#
 * #simp=1*13,0*3#
 */
static int
@name@_nonzero(PyObject *a)
{
    int ret;
    @name@ arg1;

    if (_@name@_convert_to_ctype(a, &arg1) < 0) {
        if (PyErr_Occurred()) {
            return -1;
        }
        return PyGenericArrType_Type.tp_as_number->nb_nonzero(a);
    }

    /*
     * here we do the actual calculation with arg1 and arg2
     * make it a function call.
     */

#if @simp@
    ret = (arg1 != 0);
#else
    ret = ((arg1.real != 0) || (arg1.imag != 0));
#endif

    return ret;
}
/**end repeat**/

/**begin repeat
 *
 * #name=byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble#
 * #Name=Byte,UByte,Short,UShort,Int,UInt,Long,ULong,LongLong,ULongLong,Float,Double,LongDouble,CFloat,CDouble,CLongDouble#
 * #cmplx=,,,,,,,,,,,,,.real,.real,.real#
 * #sign=(signed,unsigned)*5,,,,,,#
 * #unsigntyp=0,1,0,1,0,1,0,1,0,1,1*6#
 * #ctype=long*8,PY_LONG_LONG*2,double*6#
 * #realtyp=0*10,1*6#
 * #func=(PyLong_FromLong,PyLong_FromUnsignedLong)*4,PyLong_FromLongLong,PyLong_FromUnsignedLongLong,PyLong_FromDouble*6#
 */
static PyObject *
@name@_int(PyObject *obj)
{
    @sign@ @ctype@ x= PyArrayScalar_VAL(obj, @Name@)@cmplx@;
#if @realtyp@
    double ix;
    modf(x, &ix);
    x = ix;
#endif

/*
 * For unsigned type, the (@ctype@) cast just does what is implicitely done by
 * the compiler.
 */
#if @unsigntyp@
    if(LONG_MIN < (@ctype@)x && (@ctype@)x < LONG_MAX)
        return PyInt_FromLong(x);
#else
    if(LONG_MIN < x && x < LONG_MAX)
        return PyInt_FromLong(x);
#endif
    return @func@(x);
}
/**end repeat**/

/**begin repeat
 *
 * #name=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble)*2#
 * #Name=(Byte,UByte,Short,UShort,Int,UInt,Long,ULong,LongLong,ULongLong,Float,Double,LongDouble,CFloat,CDouble,CLongDouble)*2#
 * #cmplx=(,,,,,,,,,,,,,.real,.real,.real)*2#
 * #which=long*16,float*16#
 * #func=(PyLong_FromLongLong, PyLong_FromUnsignedLongLong)*5,PyLong_FromDouble*6,PyFloat_FromDouble*16#
 */
static PyObject *
@name@_@which@(PyObject *obj)
{
    return @func@((PyArrayScalar_VAL(obj, @Name@))@cmplx@);
}
/**end repeat**/


/**begin repeat
 *
 * #name=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble)*2#
 * #oper=oct*16, hex*16#
 * #kind=(int*5, long*5, int, long*2, int, long*2)*2#
 * #cap=(Int*5, Long*5, Int, Long*2, Int, Long*2)*2#
 */
static PyObject *
@name@_@oper@(PyObject *obj)
{
    PyObject *pyint;
    pyint = @name@_@kind@(obj);
    if (pyint == NULL) return NULL;
    return Py@cap@_Type.tp_as_number->nb_@oper@(pyint);
}
/**end repeat**/


/**begin repeat
 * #oper=le,ge,lt,gt,eq,ne#
 * #op=<=,>=,<,>,==,!=#
 */
#define def_cmp_@oper@(arg1, arg2) (arg1 @op@ arg2)
#define cmplx_cmp_@oper@(arg1, arg2) ((arg1.real == arg2.real) ?        \
                                      arg1.imag @op@ arg2.imag :        \
                                      arg1.real @op@ arg2.real)
/**end repeat**/

/**begin repeat
 * #name=byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble#
 * #simp=def*13,cmplx*3#
 */
static PyObject*
@name@_richcompare(PyObject *self, PyObject *other, int cmp_op)
{
    @name@ arg1, arg2;
    int out=0;

    switch(_@name@_convert2_to_ctypes(self, &arg1, other, &arg2)) {
    case 0:
        break;
    case -1: /* can't cast both safely use different add function */
    case -2: /* use ufunc */
        if (PyErr_Occurred()) return NULL;
        return PyGenericArrType_Type.tp_richcompare(self, other, cmp_op);
    case -3: /* special case for longdouble and clongdouble
		because they have a recursive getitem in their dtype */
        Py_INCREF(Py_NotImplemented);
	return Py_NotImplemented;
    }

    /* here we do the actual calculation with arg1 and arg2 */
    switch (cmp_op) {
    case Py_EQ:
        out = @simp@_cmp_eq(arg1, arg2);
        break;
    case Py_NE:
        out = @simp@_cmp_ne(arg1, arg2);
        break;
    case Py_LE:
        out = @simp@_cmp_le(arg1, arg2);
        break;
    case Py_GE:
        out = @simp@_cmp_ge(arg1, arg2);
        break;
    case Py_LT:
        out = @simp@_cmp_lt(arg1, arg2);
        break;
    case Py_GT:
        out = @simp@_cmp_gt(arg1, arg2);
        break;
    }

    if (out) {
        PyArrayScalar_RETURN_TRUE;
    }
    else {
        PyArrayScalar_RETURN_FALSE;
    }
}
/**end repeat**/


/**begin repeat
   #name=byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble#
**/
static PyNumberMethods @name@_as_number = {
    (binaryfunc)@name@_add,                    /*nb_add*/
    (binaryfunc)@name@_subtract,               /*nb_subtract*/
    (binaryfunc)@name@_multiply,               /*nb_multiply*/
    (binaryfunc)@name@_divide,                 /*nb_divide*/
    (binaryfunc)@name@_remainder,              /*nb_remainder*/
    (binaryfunc)@name@_divmod,                 /*nb_divmod*/
    (ternaryfunc)@name@_power,                 /*nb_power*/
    (unaryfunc)@name@_negative,
    (unaryfunc)@name@_positive,                /*nb_pos*/
    (unaryfunc)@name@_absolute,                /*nb_abs*/
    (inquiry)@name@_nonzero,                   /*nb_nonzero*/
    (unaryfunc)@name@_invert,                  /*nb_invert*/
    (binaryfunc)@name@_lshift,                /*nb_lshift*/
    (binaryfunc)@name@_rshift,                /*nb_rshift*/
    (binaryfunc)@name@_and,                  /*nb_and*/
    (binaryfunc)@name@_xor,                  /*nb_xor*/
    (binaryfunc)@name@_or,                   /*nb_or*/
    0,                                      /*nb_coerce*/
    (unaryfunc)@name@_int,                   /*nb_int*/
    (unaryfunc)@name@_long,                  /*nb_long*/
    (unaryfunc)@name@_float,                 /*nb_float*/
    (unaryfunc)@name@_oct,                   /*nb_oct*/
    (unaryfunc)@name@_hex,                  /*nb_hex*/
    0,                                     /*inplace_add*/
    0,                                     /*inplace_subtract*/
    0,                                     /*inplace_multiply*/
    0,                                     /*inplace_divide*/
    0,                                    /*inplace_remainder*/
    0,                              /*inplace_power*/
    0,                            /*inplace_lshift*/
    0,                            /*inplace_rshift*/
    0,                            /*inplace_and*/
    0,                            /*inplace_xor*/
    0,                            /*inplace_or*/
    (binaryfunc)@name@_floor_divide,            /*nb_floor_divide*/
    (binaryfunc)@name@_true_divide,             /*nb_true_divide*/
    0,                                         /*nb_inplace_floor_divide*/
    0,                                         /*nb_inplace_true_divide*/
#if PY_VERSION_HEX >= 0x02050000
    (unaryfunc)NULL,                      /*nb_index*/
#endif
};
/**end repeat**/

static void *saved_tables_arrtype[9];

static void
add_scalarmath(void)
{
    /**begin repeat
       #name=byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble#
       #NAME=Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong, Float, Double, LongDouble, CFloat, CDouble, CLongDouble#
    **/
#if PY_VERSION_HEX >= 0x02050000
    @name@_as_number.nb_index = Py@NAME@ArrType_Type.tp_as_number->nb_index;
#endif
    Py@NAME@ArrType_Type.tp_as_number = &(@name@_as_number);
    Py@NAME@ArrType_Type.tp_richcompare = @name@_richcompare;
    /**end repeat**/

    saved_tables_arrtype[0] = PyLongArrType_Type.tp_as_number;
    saved_tables_arrtype[1] = PyLongArrType_Type.tp_compare;
    saved_tables_arrtype[2] = PyLongArrType_Type.tp_richcompare;
    saved_tables_arrtype[3] = PyDoubleArrType_Type.tp_as_number;
    saved_tables_arrtype[4] = PyDoubleArrType_Type.tp_compare;
    saved_tables_arrtype[5] = PyDoubleArrType_Type.tp_richcompare;
    saved_tables_arrtype[6] = PyCDoubleArrType_Type.tp_as_number;
    saved_tables_arrtype[7] = PyCDoubleArrType_Type.tp_compare;
    saved_tables_arrtype[8] = PyCDoubleArrType_Type.tp_richcompare;
}

static int
get_functions(void)
{
    PyObject *mm, *obj;
    void **funcdata;
    char *signatures;
    int i, j;
    int ret = -1;

    /* Get the nc_pow functions */
    /* Get the pow functions */
    mm = PyImport_ImportModule("numpy.core.umath");
    if (mm == NULL) return -1;

    obj = PyObject_GetAttrString(mm, "power");
    if (obj == NULL) goto fail;
    funcdata = ((PyUFuncObject *)obj)->data;
    signatures = ((PyUFuncObject *)obj)->types;

    i = 0;
    j = 0;
    while(signatures[i] != PyArray_FLOAT) {i+=3; j++;}
    _basic_float_pow = funcdata[j];
    _basic_double_pow = funcdata[j+1];
    _basic_longdouble_pow = funcdata[j+2];
    _basic_cfloat_pow = funcdata[j+3];
    _basic_cdouble_pow = funcdata[j+4];
    _basic_clongdouble_pow = funcdata[j+5];
    Py_DECREF(obj);

    /* Get the floor functions */
    obj = PyObject_GetAttrString(mm, "floor");
    if (obj == NULL) goto fail;
    funcdata = ((PyUFuncObject *)obj)->data;
    signatures = ((PyUFuncObject *)obj)->types;
    i = 0;
    j = 0;
    while(signatures[i] != PyArray_FLOAT) {i+=2; j++;}
    _basic_float_floor = funcdata[j];
    _basic_double_floor = funcdata[j+1];
    _basic_longdouble_floor = funcdata[j+2];
    Py_DECREF(obj);

    /* Get the sqrt functions */
    obj = PyObject_GetAttrString(mm, "sqrt");
    if (obj == NULL) goto fail;
    funcdata = ((PyUFuncObject *)obj)->data;
    signatures = ((PyUFuncObject *)obj)->types;
    i = 0;
    j = 0;
    while(signatures[i] != PyArray_FLOAT) {i+=2; j++;}
    _basic_float_sqrt = funcdata[j];
    _basic_double_sqrt = funcdata[j+1];
    _basic_longdouble_sqrt = funcdata[j+2];
    Py_DECREF(obj);

    /* Get the fmod functions */
    obj = PyObject_GetAttrString(mm, "fmod");
    if (obj == NULL) goto fail;
    funcdata = ((PyUFuncObject *)obj)->data;
    signatures = ((PyUFuncObject *)obj)->types;
    i = 0;
    j = 0;
    while(signatures[i] != PyArray_FLOAT) {i+=3; j++;}
    _basic_float_fmod = funcdata[j];
    _basic_double_fmod = funcdata[j+1];
    _basic_longdouble_fmod = funcdata[j+2];
    Py_DECREF(obj);
    return

        ret = 0;
 fail:
    Py_DECREF(mm);
    return ret;
}

static void *saved_tables[9];

char doc_alterpyscalars[] = "";

static PyObject *
alter_pyscalars(PyObject *NPY_UNUSED(dummy), PyObject *args)
{
    int n;
    PyObject *obj;
    n = PyTuple_GET_SIZE(args);
    while(n--) {
        obj = PyTuple_GET_ITEM(args, n);
        if (obj == (PyObject *)(&PyInt_Type)) {
            PyInt_Type.tp_as_number = PyLongArrType_Type.tp_as_number;
            PyInt_Type.tp_compare = PyLongArrType_Type.tp_compare;
            PyInt_Type.tp_richcompare = PyLongArrType_Type.tp_richcompare;
        }
        else if (obj == (PyObject *)(&PyFloat_Type)) {
            PyFloat_Type.tp_as_number = PyDoubleArrType_Type.tp_as_number;
            PyFloat_Type.tp_compare = PyDoubleArrType_Type.tp_compare;
            PyFloat_Type.tp_richcompare = PyDoubleArrType_Type.tp_richcompare;
        }
        else if (obj == (PyObject *)(&PyComplex_Type)) {
            PyComplex_Type.tp_as_number = PyCDoubleArrType_Type.tp_as_number;
            PyComplex_Type.tp_compare = PyCDoubleArrType_Type.tp_compare;
            PyComplex_Type.tp_richcompare =             \
                PyCDoubleArrType_Type.tp_richcompare;
        }
        else {
            PyErr_SetString(PyExc_ValueError,
                            "arguments must be int, float, or complex");
            return NULL;
        }
    }
    Py_INCREF(Py_None);
    return Py_None;
}

char doc_restorepyscalars[] = "";
static PyObject *
restore_pyscalars(PyObject *NPY_UNUSED(dummy), PyObject *args)
{
    int n;
    PyObject *obj;
    n = PyTuple_GET_SIZE(args);
    while(n--) {
        obj = PyTuple_GET_ITEM(args, n);
        if (obj == (PyObject *)(&PyInt_Type)) {
            PyInt_Type.tp_as_number = saved_tables[0];
            PyInt_Type.tp_compare = saved_tables[1];
            PyInt_Type.tp_richcompare = saved_tables[2];
        }
        else if (obj == (PyObject *)(&PyFloat_Type)) {
            PyFloat_Type.tp_as_number = saved_tables[3];
            PyFloat_Type.tp_compare = saved_tables[4];
            PyFloat_Type.tp_richcompare = saved_tables[5];
        }
        else if (obj == (PyObject *)(&PyComplex_Type)) {
            PyComplex_Type.tp_as_number = saved_tables[6];
            PyComplex_Type.tp_compare = saved_tables[7];
            PyComplex_Type.tp_richcompare = saved_tables[8];
        }
        else {
            PyErr_SetString(PyExc_ValueError,
                            "arguments must be int, float, or complex");
            return NULL;
        }
    }
    Py_INCREF(Py_None);
    return Py_None;
}

char doc_usepythonmath[] = "";
static PyObject *
use_pythonmath(PyObject *NPY_UNUSED(dummy), PyObject *args)
{
    int n;
    PyObject *obj;
    n = PyTuple_GET_SIZE(args);
    while(n--) {
        obj = PyTuple_GET_ITEM(args, n);
        if (obj == (PyObject *)(&PyInt_Type)) {
            PyLongArrType_Type.tp_as_number = saved_tables[0];
            PyLongArrType_Type.tp_compare = saved_tables[1];
            PyLongArrType_Type.tp_richcompare = saved_tables[2];
        }
        else if (obj == (PyObject *)(&PyFloat_Type)) {
            PyDoubleArrType_Type.tp_as_number = saved_tables[3];
            PyDoubleArrType_Type.tp_compare = saved_tables[4];
            PyDoubleArrType_Type.tp_richcompare = saved_tables[5];
        }
        else if (obj == (PyObject *)(&PyComplex_Type)) {
            PyCDoubleArrType_Type.tp_as_number = saved_tables[6];
            PyCDoubleArrType_Type.tp_compare = saved_tables[7];
            PyCDoubleArrType_Type.tp_richcompare = saved_tables[8];
        }
        else {
            PyErr_SetString(PyExc_ValueError,
                            "arguments must be int, float, or complex");
            return NULL;
        }
    }
    Py_INCREF(Py_None);
    return Py_None;
}

char doc_usescalarmath[] = "";
static PyObject *
use_scalarmath(PyObject *NPY_UNUSED(dummy), PyObject *args)
{
    int n;
    PyObject *obj;
    n = PyTuple_GET_SIZE(args);
    while(n--) {
        obj = PyTuple_GET_ITEM(args, n);
        if (obj == (PyObject *)(&PyInt_Type)) {
            PyLongArrType_Type.tp_as_number = saved_tables_arrtype[0];
            PyLongArrType_Type.tp_compare = saved_tables_arrtype[1];
            PyLongArrType_Type.tp_richcompare = saved_tables_arrtype[2];
        }
        else if (obj == (PyObject *)(&PyFloat_Type)) {
            PyDoubleArrType_Type.tp_as_number = saved_tables_arrtype[3];
            PyDoubleArrType_Type.tp_compare = saved_tables_arrtype[4];
            PyDoubleArrType_Type.tp_richcompare = saved_tables_arrtype[5];
        }
        else if (obj == (PyObject *)(&PyComplex_Type)) {
            PyCDoubleArrType_Type.tp_as_number = saved_tables_arrtype[6];
            PyCDoubleArrType_Type.tp_compare = saved_tables_arrtype[7];
            PyCDoubleArrType_Type.tp_richcompare = saved_tables_arrtype[8];
        }
        else {
            PyErr_SetString(PyExc_ValueError,
                            "arguments must be int, float, or complex");
            return NULL;
        }
    }
    Py_INCREF(Py_None);
    return Py_None;
}

static struct PyMethodDef methods[] = {
    {"alter_pythonmath", (PyCFunction) alter_pyscalars,
     METH_VARARGS, doc_alterpyscalars},
    {"restore_pythonmath", (PyCFunction) restore_pyscalars,
     METH_VARARGS, doc_restorepyscalars},
    {"use_pythonmath", (PyCFunction) use_pythonmath,
     METH_VARARGS, doc_usepythonmath},
    {"use_scalarmath", (PyCFunction) use_scalarmath,
     METH_VARARGS, doc_usescalarmath},
    {NULL, NULL, 0, NULL}
};

PyMODINIT_FUNC initscalarmath(void) {

    Py_InitModule("scalarmath", methods);

    import_array();
    import_umath();

    if (get_functions() < 0) return;

    add_scalarmath();

    saved_tables[0] = PyInt_Type.tp_as_number;
    saved_tables[1] = PyInt_Type.tp_compare;
    saved_tables[2] = PyInt_Type.tp_richcompare;
    saved_tables[3] = PyFloat_Type.tp_as_number;
    saved_tables[4] = PyFloat_Type.tp_compare;
    saved_tables[5] = PyFloat_Type.tp_richcompare;
    saved_tables[6] = PyComplex_Type.tp_as_number;
    saved_tables[7] = PyComplex_Type.tp_compare;
    saved_tables[8] = PyComplex_Type.tp_richcompare;

    return;
}
